/*
Copyright 2008-2009 Elöd Egyed-Zsigmond, Cyril Laitang
Copyright 2009-2011 Samuel Gesche

This file is part of IPRI News Analyzer.

IPRI News Analyzer is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

IPRI News Analyzer is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with IPRI News Analyzer.  If not, see <http://www.gnu.org/licenses/>.
*/

package zold.proc.tagging;

import data.structures.tagging.*;
import proc.tagging.*;
import data.structures.resources.RessourceItem;
import proc.tagging.TreeTagger;
import data.structures.tagging.LemmaVector;
import data.base.connectors.DBPediaDatabase;
import data.base.Database;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import proc.text.XMLCleaner;

/**
 *
 * @author Cyril
 */
public class _RessourceLemmatizer {
     private Database theDB;
     public _RessourceLemmatizer(Database db){
        theDB = db;
     }
  public void lemmatizeRessources() {
        //recupere la liste de toutes les thématiques dans la BD
        DBPediaDatabase td = new DBPediaDatabase(theDB);
        TreeTagger tt = new TreeTagger();
        System.out.println("Lemmatization des ressources");
        try{
            Set<RessourceItem> items = td.getAllRessourcesItems();
            // pour tout les items
            for (Iterator iter = items.iterator(); iter.hasNext();){
                RessourceItem item = (RessourceItem) iter.next();
                // tagger le suivant dans tree tagger
                LemmaVector lemmasDesc = tt.processText(XMLCleaner.xmlToText(item.getDesc()));
                // filtrer les lemmes
                //lemmasDesc.filtre();
                // mise à jour des champs de lemmes
                item.setDescLemmas(lemmasDesc.toLemmas());
                // update the two RSS_items field that contains lemmas
                td.UpdateLemmas(item);
                // add the lemmas to the lemma table
//                addLemme(item.getRessourceID(),lemmasDesc);
            }
        }catch (Exception ex){
            //debug
            System.out.println(String.format("Lemme desc insert problem :"+ex.getMessage()));
        }
    }

       // add the lemmas correspondin to the itemId and for the two vectors
    public void addLemme(int idRessource, LemmaVector desc) {

        DBPediaDatabase td = new DBPediaDatabase(theDB);
        HashMap<LemmaInfos, Integer> lemmasCountDesc = desc.toLemmasCount();

        // for each description lemma
        for (Iterator iter = lemmasCountDesc.keySet().iterator(); iter.hasNext();) {
            String lemma = (String) iter.next();
            if (!lemmasCountDesc.containsKey(lemma)) {
                try{
                    td.InsertLemma(idRessource, lemma, 0, lemmasCountDesc.get(lemma));
                }catch (Exception ex){
                    //debug
                    System.out.println(String.format("Lemme desc insert problem :"+ex.getMessage()));
                }
            }
        }
    }
}